from pyspark.context import SparkContext

sc = SparkContext(master="local", appName="word_count")

names_rdd = sc.parallelize([('001', '张三'),
                            ('002', '李四'),
                            ('003', '王五'),
                            ('005', '赵六')])

ages_rdd = sc.parallelize([('001', 23),
                           ('002', 24),
                           ('003', 25),
                           ('004', 26)])

names_rdd.join(ages_rdd).foreach(print)

print('='*100)

names_rdd.leftOuterJoin(ages_rdd).foreach(print)

print('='*100)

names_rdd.fullOuterJoin(ages_rdd).foreach(print)

names_rdd.join(ages_rdd).map(lambda kv:(kv[0],kv[1][0],kv[1][1])).foreach(print)